PDB to Mole file converter Simon Kilvington, 1993 see "Protein Data Bank Atomic Coordinate Entry Format Description" published by Brookhaven National Laboratory, for a description of the PDB file format version$="v0.02 (21-Apr-93)" @maxatoms=512: you can adjust this if you have enough memory atomname$(50),connectdist(50),atmrad(50),atmcol%(50) serial%(maxatoms),name$(maxatoms),residue$(maxatoms),resseq%(maxatoms) xpos(maxatoms),ypos(maxatoms),zpos(maxatoms) connect%(maxatoms),connectto%(maxatoms,3) read in connection distance data Edisterr=1: error in coordinates (used later in connecting atoms) ("PDBFiles.connect") line$= getline(H%) line$<>"" !atomname$(A%)= getword(line$) (atomname$(A%))=1 atomname$(A%)=" "+atomname$(A%) :connectdist(A%)= getword(line$))*100: convert to pm !atmrad(A%)= getword(line$)) +atmcol%(A%)= ("&"+ getword(line$)+"00") A%+=1 lasttype%=A% get the input and ouput file names "PDB to Mole file converter "+version$ "==========================="+ (version$),"=") '"PDB files are taken from directory 'PDBFiles.pdb'" "Mole files are saved in directory 'MoleFiles'" '"*** WARNING *** dont use this to convert large protein files as it is not finished yet!" '"PDB file leafname : "pdbfile$ "Mole file leafname : "molefile$ '"Reading in PDB data..." 2"H%= ("PDBFiles.pdb."+pdbfile$) A%=0:C%=0 record$= getline(H%) record$,6) "ATOM ","HETATM" 8 serial%(A%)= record$,7,5)) name$(A%)= record$,13,4) residue$(A%)= record$,18,3) ;!resseq%(A%)= record$,23,4)) <2xpos(A%)= record$,31,8))*100: convert to pm ="ypos(A%)= record$,39,8))*100 >"zpos(A%)= record$,47,8))*100 ? A%+=1 "SSBOND" "Record type 'SSBOND' not yet supported, so ignored" "CONECT" C!connect%(C%)= record$,7,5)) T%=0 record$,12+T%*5,5) to$=" " connectto%(C%,T%)=-1 connectto%(C%,T%)= (to$) H C%+=1 "TER " "Record type 'TER ' not yet supported, so ignored" "REMARK","FTNOTE" record$ "Unknown record type '"; record$,6);"' ignored" lastatom%=A% lastconnect%=C% '"Building Mole file..." decpt=8 hdrsize%=4+4+4+8+4+12+12+4 atmsize%=12+4+4+4+16 data% hdrsize%+lastatom%*atmsize% dataend%=data% set up a default header ^>!dataend%=&454C4F4D:dataend%!4=200: id word, file version _,dataend%!8=&00424450: title 'PDB'+CHR$0 `Bdataend%!12=&00000000:dataend%!16=&00BBFF00: bg and bond cols a8dataend%!20=1<="A" symbol2$<="Z" symbol$=" "+symbol2$ symbol$=symbol1$+symbol2$ p B%=0 atomname$(B%)<>symbol$ B%+=1 B%=lasttype% "Unknown atom type '"+symbol$+"'": u& dataend%!12=atmrad(B%)*(1<serial%(A%) C%+=1 val%=0 C%-1 val%+=1 see if we have a template for this residue type !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! this inability to use connectivity templates means it will take an incredibly long time to convert large protein data files, and even when it does eventually finish it may well have added extra bonds to the model !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! no template so use the distance data to see whats connected to what atm%=0 lastatom%-1 atm%<>A% symbol1$= name$(atm%),1) " symbol2$= name$(atm%),2,1) X symbol2$>="A" symbol2$<="Z" atmsym$=" "+symbol2$ atmsym$=symbol1$+symbol2$ R%=0 ! atomname$(R%)<>atmsym$ R%+=1 < R%=lasttype% "Unknown atom type '"+atmsym$+"'": dx=xpos(atm%)-xpos(A%) dy=ypos(atm%)-ypos(A%) dz=zpos(atm%)-zpos(A%) N (dx*dx+dy*dy+dz*dz) < (connectdist(B%)+connectdist(R%)+2*disterr) * connectto%(C%,val%)=serial%(atm%) val%+=1 val%=0 val%>4 "Atom ";A%;" is unconnected or over connected": 0 dataend%!20=((val%-1)<<16)+val%: info word symbol$=" H" dataend%!20+=&80000000 dataend%+=24 B%=0 V%=1 val% connectto%(C%,B%)=-1 B%+=1 atom%=0 serial%(atom%)<>connectto%(C%,B%) atom%+=1 atom%=lastatom% "Atom ";A%;" is connected to a non-existant atom": ! !dataend%=atom%:dataend%+=4 B%+=1 A%+=1 A%=lastatom% '"Saving as 'MoleFiles."+molefile$+"'" ("Save MoleFiles."+molefile$+" "+ ~data%+" "+ ~dataend%) ("SetType MoleFiles."+molefile$+" Mole") '"You may need to recentre the molecule before you can see it in Mole" getline(H%) line$="" byte%= byte%>=32 line$+= byte% byte%= =line$ getword( T$,1)=" " T$,2) W$="" (T$)>0 T$,1)<>" " T$,1) T$,2)